
import sys, os, argparse, codecs, string, re
from cn_tn import NSWNormalizer,CHINESE_PUNC_LIST,find_chinese




if __name__ == '__main__':
    # nsw_test()

    ifile = 'qq12345组cp啊,温度23°C'
    text = ifile.strip()

    # NSW(Non-Standard-Word) normalization
    text = NSWNormalizer(text).normalize()

    # Punctuations removal
    old_chars = CHINESE_PUNC_LIST + string.punctuation  # includes all CN and EN punctuations
    new_chars = ' ' * len(old_chars)
    del_chars = ''
    text = (text.translate(str.maketrans(old_chars, new_chars, del_chars))).replace(" ", "")
    text1 = text.replace('qq', '扣扣')
    text = text1.replace('cp', '谁劈')

    text = find_chinese(text)

    print(text)